df <- read.csv(file.choose())
head(df)
for(i in colnames(df)){
if(is.character(df[[i]])){
df[[i]] = as.factor(df[[i]])
}
}
str(df)
## 'data.frame': 149 obs. of 20 variables:
## $ Country.name : Factor w/ 149 levels "Afghanistan",..: 41 34 129 55 97 104 128 79 98 7 ...
## $ Regional.indicator : Factor w/ 10 levels "Central and Eastern Europe",..: 10 10 10 10 10 10 10 10 6 10 ...
## $ Ladder.score : num 7.84 7.62 7.57 7.55 7.46 ...
## $ Standard.error.of.ladder.score : num 0.032 0.035 0.036 0.059 0.027 0.035 0.036 0.037 0.04 0.036 ...
## $ upperwhisker : num 7.9 7.69 7.64 7.67 7.52 ...
## $ lowerwhisker : num 7.78 7.55 7.5 7.44 7.41 ...
## $ Logged.GDP.per.capita : num 10.8 10.9 11.1 10.9 10.9 ...
## $ Social.support : num 0.954 0.954 0.942 0.983 0.942 0.954 0.934 0.908 0.948 0.934 ...
## $ Healthy.life.expectancy : num 72 72.7 74.4 73 72.4 73.3 72.7 72.6 73.4 73.3 ...
## $ Freedom.to.make.life.choices : num 0.949 0.946 0.919 0.955 0.913 0.96 0.945 0.907 0.929 0.908 ...
## $ Generosity : num -0.098 0.03 0.025 0.16 0.175 0.093 0.086 -0.034 0.134 0.042 ...
## $ Perceptions.of.corruption : num 0.186 0.179 0.292 0.673 0.338 0.27 0.237 0.386 0.242 0.481 ...
## $ Ladder.score.in.Dystopia : num 2.43 2.43 2.43 2.43 2.43 2.43 2.43 2.43 2.43 2.43 ...
## $ Explained.by..Log.GDP.per.capita : num 1.45 1.5 1.57 1.48 1.5 ...
## $ Explained.by..Social.support : num 1.11 1.11 1.08 1.17 1.08 ...
## $ Explained.by..Healthy.life.expectancy : num 0.741 0.763 0.816 0.772 0.753 0.782 0.763 0.76 0.785 0.782 ...
## $ Explained.by..Freedom.to.make.life.choices: num 0.691 0.686 0.653 0.698 0.647 0.703 0.685 0.639 0.665 0.64 ...
## $ Explained.by..Generosity : num 0.124 0.208 0.204 0.293 0.302 0.249 0.244 0.166 0.276 0.215 ...
## $ Explained.by..Perceptions.of.corruption : num 0.481 0.485 0.413 0.17 0.384 0.427 0.448 0.353 0.445 0.292 ...
## $ Dystopia...residual : num 3.25 2.87 2.84 2.97 2.8 ...
sum(is.na(df))
## [1] 0
df <- df %>%
distinct(.)
df <- rename(df, Happiness.score = Ladder.score,
Standard.error.of.happiness = Standard.error.of.ladder.score,
Life.expectancy = Healthy.life.expectancy,
Freedom.of.choices = Freedom.to.make.life.choices)
colnames(df)
## [1] "Country.name"
## [2] "Regional.indicator"
## [3] "Happiness.score"
## [4] "Standard.error.of.happiness"
## [5] "upperwhisker"
## [6] "lowerwhisker"
## [7] "Logged.GDP.per.capita"
## [8] "Social.support"
## [9] "Life.expectancy"
## [10] "Freedom.of.choices"
## [11] "Generosity"
## [12] "Perceptions.of.corruption"
## [13] "Ladder.score.in.Dystopia"
## [14] "Explained.by..Log.GDP.per.capita"
## [15] "Explained.by..Social.support"
## [16] "Explained.by..Healthy.life.expectancy"
## [17] "Explained.by..Freedom.to.make.life.choices"
## [18] "Explained.by..Generosity"
## [19] "Explained.by..Perceptions.of.corruption"
## [20] "Dystopia...residual"
df <- df %>%
select(-c("Ladder.score.in.Dystopia", "Dystopia...residual", "Explained.by..Log.GDP.per.capita", "Explained.by..Healthy.life.expectancy", "Explained.by..Generosity", "Explained.by..Social.support", "Explained.by..Freedom.to.make.life.choices", "Explained.by..Perceptions.of.corruption", "upperwhisker", "lowerwhisker", "Standard.error.of.happiness"))
colnames(df)
## [1] "Country.name" "Regional.indicator"
## [3] "Happiness.score" "Logged.GDP.per.capita"
## [5] "Social.support" "Life.expectancy"
## [7] "Freedom.of.choices" "Generosity"
## [9] "Perceptions.of.corruption"
asean <- c("Indonesia", "Malaysia", "Philippines", "Singapore", "Thailand", "Brunei Darussalam", "Vietnam", "Laos", "Myanmar", "Cambodia")
df_asean <- df %>%
filter(Country.name %in% asean)
df_asean
df_percentage <- df %>%
mutate(
Social.support = Social.support * 100,
Freedom.of.choices = Freedom.of.choices * 100,
Generosity = Generosity * 100,
Perceptions.of.corruption = Perceptions.of.corruption * 100
)
df_percentage
num_data <- df_percentage %>%
select_if(is.numeric)
corr_matrix <- round(cor(num_data),2)
melted_corr_matrix <- melt(corr_matrix)
head(melted_corr_matrix)
heatmap <- ggplot(data = melted_corr_matrix, aes(x = Var1, y = Var2, fill = value, text = paste(Var1, "and", Var2, "<br>Correlation :", value))) +
geom_tile() +
geom_text(aes(label = value), color = "black", size = 4) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
labs(title = "Correlation Heatmap", x = "", y = "") +
scale_fill_gradient2(low = c("#56AD6A"), high = c("#269B47"), mid = "white",
midpoint = 0, limit = c(-1, 1), space = "Lab",
name="Correlation")
ggplotly(heatmap, tooltip = "text")
Based on this heatmap, if we focus on Happiness Score, we can see that GDP per Capita, Social Support, Life Expectancy, and Freedom of Choices have positive correlation with Happiness Score. But, Perceptions of Corruption have negative correlation with Happiness Score. Generosity and Happiness Score doesn’t have correlation because the value is close to 0, which is 0,02. The highest correlation with Happiness Score is GDP per Capita which has a value of 0,79.
bar_chart <- ggplot(df_asean, aes(x = Country.name, y = Happiness.score)) +
geom_col(fill = c("#97CBA1")) +
labs(title = "Happiness Score of ASEAN Countries in 2021", x = "Country", y = "Happiness Score") +
theme(axis.line.x = element_text(angle = 45, hjust = 1)) +
theme_minimal()
ggplotly(bar_chart)
In this bar chart, we can see that Singapore is the country that has a highest happiness score which has value of 6,377. Beside of that, country that has the lowest happiness score in ASEAN is Myanmar that has value of 4,426.
lg_2 <- ggplot(df_percentage, aes(x = Happiness.score, y = Logged.GDP.per.capita)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE, col = "#269B47") +
labs(title = "Happiness Score vs GDP per Capita", x = "Happiness Score", y = "GDP per Capita") +
theme_minimal()
ggplotly(lg_2)
## `geom_smooth()` using formula = 'y ~ x'
By this linear reggresion, we can see that Happiness Score and GDP per Capita have positive correlation. It means that, the higher GDP per Capita, the higher Happiness Score in that country.
lg_3 <- ggplot(df_percentage, aes(x = Happiness.score, y = Life.expectancy)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE, col = "#269B47") +
labs(title = "Happiness Score vs Life Expectancy", x = "Happiness Score", y = "Life Expectancy") +
theme_minimal()
ggplotly(lg_3)
## `geom_smooth()` using formula = 'y ~ x'
By this linear reggresion, we can see that Happiness Score and Life Expectancy have positive correlation. It means that, the higher Life Expectancy, the higher Happiness Score in that country.
lg_4 <- ggplot(df_percentage, aes(x = Happiness.score, y = Freedom.of.choices)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE, col = "#269B47") +
labs(title = "Happiness Score vs Freedom to Make Life Choices", x = "Happiness Score", y = "Freedom to Make Life Choices") +
theme_minimal()
ggplotly(lg_4)
## `geom_smooth()` using formula = 'y ~ x'
By this linear reggresion, we can see that Happiness Score and Freedom to Make Life Choices have positive correlation. It means that, the higher Freedom to make Life Choices, the higher Happiness Score in that country.
lg_5 <- ggplot(df_percentage, aes(x = Happiness.score, y = Generosity)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE, col = "#269B47") +
labs(title = "Happiness Score vs Generosity", x = "Happiness Score", y = "Generosity") +
theme_minimal()
ggplotly(lg_5)
## `geom_smooth()` using formula = 'y ~ x'
By this linear reggresion, we can see that Happiness Score and Generosity does not have correlation. It means that, Generosity doesn’t affect Happiness Score at all.
lg_6 <- ggplot(df_percentage, aes(x = Happiness.score, y = Perceptions.of.corruption)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE, col = "#269B47") +
labs(title = "Happiness Score vs Perception of Corruption", x = "Happiness Score", y = "Perception of Corruption") +
theme_minimal()
ggplotly(lg_6)
## `geom_smooth()` using formula = 'y ~ x'
By this linear reggresion, we can see that Happiness Score and Perception of Corruption have negative correlation. It means that, the lower Perception of Corruption, the higher Happiness Score in that country.
box_plot <- ggplot(df_percentage, aes(x = Regional.indicator,y = Happiness.score)) +
geom_boxplot(fill = "#56AD6A") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
labs(title = "Boxplot of Happiness per Regional", x = "Regional", y = "Happiness Score")
ggplotly(box_plot)
In this box plot, we can see that Western Europe and North America and ANZ have the highest median happiness scores.Beside of that, South Asia has the lowest median happiness score. Also, we can see that North America and ANZ, as well as Sub-Saharan Africa, exhibit the highest variability in happiness scores, while East Asia and Central and Eastern Europe show the least variability.
maps_happiness <- plot_ly(
df_percentage,
type = "choropleth",
locations = ~Country.name,
locationmode = 'country names',
z = ~Happiness.score,
text = ~paste(Country.name, '<br>Region:', Regional.indicator, '<br>Happiness Score:', Happiness.score),
hoverinfo = "text",
colorscale = 'Viridis',
marker = list(line = list(color = 'white', width = 0.5)),
colorbar = list(title = 'Happiness Score')
)
maps_happiness <- maps_happiness %>% layout(
title = "World Happiness Report 2021",
geo = list(showframe = FALSE, showcoastlines = FALSE, projection = list(type = 'equirectangular'))
)
maps_happiness
In this Happiness Score maps, we can observe a global happiness map where countries are color-coded: yellow indicates the happiest countries, while deep blue represents the saddest ones.We can see that Finland is the happiest country in the world and Afghanistan is the saddest country in the world.
maps_life <- plot_ly(
df_percentage,
type = "choropleth",
locations = ~Country.name,
locationmode = 'country names',
z = ~Life.expectancy,
text = ~paste(Country.name, '<br>Region:', Regional.indicator, '<br>Life Expectancy:', Life.expectancy),
hoverinfo = "text",
colorscale = 'YlGnBu',
marker = list(line = list(color = 'white', width = 0.5)),
colorbar = list(title = 'Life Expectancy')
)
maps_life <- maps_life %>% layout(
title = "World Life Expectancy Report 2021",
geo = list(showframe = FALSE, showcoastlines = FALSE, projection = list(type = 'equirectangular'))
)
maps_life
In this Life Expectancy maps, we can observe a global life expectancy map where countries are color-coded: yellow pastel indicates the highest life expectancy countries, while deep blue represents the lowest ones.We can see that Australia has the highest life expectancy in the world and Chad has the lowest life expectancy in the world.